import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from PIL import Image
from torchvision import transforms, datasets
import requests
from pathlib import Path
import tarfile
from torch.utils.data import DataLoader, random_split
import seaborn as sns
from sklearn.metrics import confusion_matrixWeek 1: Image Processing and Neural Networks Introduction
1 About Me
I received my Ph.D. in (quanty) Political Science from the University of Southern California in 2016 before transitioning into a career in artificial intelligence.
When not building AI systems, I tinker with everything I can manage, explore San Diego with my family, and train mixed martial arts.
1.1 Industry Experience
- Senior Data Scientist @ Intuit (2016–2019)
- Senior Manager, Data Science @ Oportun (IPO 2019) (2019–2020)
- Principal Data Scientist @ Figure (IPO 2025) (2020–2022)
- Machine Learning Architect @ ResMed (2022–2023)
- Principal Data Scientist, AI Group @ Labcorp (2023–2025)
- Founder @ CoreBrief (2025–Present)
1.2 Academic Positions
- Adjunct Professor, Applied AI @ University of San Diego (2023–Present)
- Lecturer @ UCSD Halıcıoğlu Data Science Institute (2021–Present)
2 Module Overview
This week introduces fundamental concepts in computer vision and basic neural network architectures using PyTorch. We’ll use Google’s 5 Flowers dataset to demonstrate key concepts in deep learning and computer vision.
3 Understanding (PyTorch) Tensors in Computer Vision
3.1 Image Tensors in Computer Vision
Let’s explore how images are represented as tensors, using real examples:
def explore_image_tensors():
# Create a simple 5x5 grayscale smiley face
grayscale = torch.zeros((5, 5))
# Draw eyes
grayscale[1, 1] = 1.0 # Left eye
grayscale[1, 3] = 1.0 # Right eye
# Draw smile
grayscale[3, 1:4] = 1.0 # Mouth
# Create a tiny RGB image with a heart shape
rgb = torch.zeros((5, 5, 3))
# Bright red heart
rgb[1, 1:4, 0] = 1.0 # Top red line
rgb[2, 0:5, 0] = 1.0 # Middle red line
rgb[3, 1:4, 0] = 1.0 # Bottom red line
# Light blue background (mix of green and blue)
rgb[0:5, 0:5, 1] = 0.4 # Green component
rgb[0:5, 0:5, 2] = 0.8 # Blue component - stronger to make it pop
# Convert to PyTorch's preferred CHW format
rgb_pytorch = rgb.permute(2, 0, 1) # Channels × Height × Width
# Print actual tensor values
print("\n1. Grayscale Image (2D tensor):")
print("Shape:", grayscale.shape, "- (Height, Width)")
print("Values (brighter = higher number):")
print(grayscale)
print("\n2. RGB Image - Standard Format (HWC):")
print("Shape:", rgb.shape, "- (Height, Width, Channels)")
print("\nFull tensor (each pixel shows [R, G, B] values):")
print(rgb)
print("\n3. Same RGB Image - PyTorch Format (CHW):")
print("Shape:", rgb_pytorch.shape, "- (Channels, Height, Width)")
print("\nRed channel (first channel):")
print(rgb_pytorch[0]) # Shows where red is present
print("\nGreen channel (second channel):")
print(rgb_pytorch[1]) # Shows where green is present
print("\nBlue channel (third channel):")
print(rgb_pytorch[2]) # Shows where blue is present
# Visualize the tensors
plt.figure(figsize=(15, 5))
# Show grayscale
plt.subplot(131)
plt.imshow(grayscale, cmap='gray')
plt.title('Grayscale Image\n(Height × Width)')
plt.colorbar(label='Pixel Values')
plt.axis('off')
# Show RGB
plt.subplot(132)
plt.imshow(rgb)
plt.title('RGB Image\nHeight × Width × Channels\n(2 × 2 × 3)')
plt.axis('off')
# Show individual RGB channels
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
# Show channels with proper value range
axes[0].imshow(rgb_pytorch[0], cmap='gray', vmin=0, vmax=1)
axes[0].set_title('Red Channel\n(0 = black, 1 = white)')
axes[0].axis('off')
axes[1].imshow(rgb_pytorch[1], cmap='gray', vmin=0, vmax=1)
axes[1].set_title('Green Channel\nValue = 0.4 everywhere')
axes[1].axis('off')
axes[2].imshow(rgb_pytorch[2], cmap='gray', vmin=0, vmax=1)
axes[2].set_title('Blue Channel\nValue = 0.8 everywhere')
axes[2].axis('off')
plt.tight_layout()
plt.show()
# Example of a 4D batch tensor
batch_size = 2
batch = torch.stack([rgb_pytorch] * batch_size)
print("\n4. Batch of Images (4D tensor):")
print("Shape:", batch.shape, "- (Batch, Channels, Height, Width)")
explore_image_tensors()
1. Grayscale Image (2D tensor):
Shape: torch.Size([5, 5]) - (Height, Width)
Values (brighter = higher number):
tensor([[0., 0., 0., 0., 0.],
[0., 1., 0., 1., 0.],
[0., 0., 0., 0., 0.],
[0., 1., 1., 1., 0.],
[0., 0., 0., 0., 0.]])
2. RGB Image - Standard Format (HWC):
Shape: torch.Size([5, 5, 3]) - (Height, Width, Channels)
Full tensor (each pixel shows [R, G, B] values):
tensor([[[0.0000, 0.4000, 0.8000],
[0.0000, 0.4000, 0.8000],
[0.0000, 0.4000, 0.8000],
[0.0000, 0.4000, 0.8000],
[0.0000, 0.4000, 0.8000]],
[[0.0000, 0.4000, 0.8000],
[1.0000, 0.4000, 0.8000],
[1.0000, 0.4000, 0.8000],
[1.0000, 0.4000, 0.8000],
[0.0000, 0.4000, 0.8000]],
[[1.0000, 0.4000, 0.8000],
[1.0000, 0.4000, 0.8000],
[1.0000, 0.4000, 0.8000],
[1.0000, 0.4000, 0.8000],
[1.0000, 0.4000, 0.8000]],
[[0.0000, 0.4000, 0.8000],
[1.0000, 0.4000, 0.8000],
[1.0000, 0.4000, 0.8000],
[1.0000, 0.4000, 0.8000],
[0.0000, 0.4000, 0.8000]],
[[0.0000, 0.4000, 0.8000],
[0.0000, 0.4000, 0.8000],
[0.0000, 0.4000, 0.8000],
[0.0000, 0.4000, 0.8000],
[0.0000, 0.4000, 0.8000]]])
3. Same RGB Image - PyTorch Format (CHW):
Shape: torch.Size([3, 5, 5]) - (Channels, Height, Width)
Red channel (first channel):
tensor([[0., 0., 0., 0., 0.],
[0., 1., 1., 1., 0.],
[1., 1., 1., 1., 1.],
[0., 1., 1., 1., 0.],
[0., 0., 0., 0., 0.]])
Green channel (second channel):
tensor([[0.4000, 0.4000, 0.4000, 0.4000, 0.4000],
[0.4000, 0.4000, 0.4000, 0.4000, 0.4000],
[0.4000, 0.4000, 0.4000, 0.4000, 0.4000],
[0.4000, 0.4000, 0.4000, 0.4000, 0.4000],
[0.4000, 0.4000, 0.4000, 0.4000, 0.4000]])
Blue channel (third channel):
tensor([[0.8000, 0.8000, 0.8000, 0.8000, 0.8000],
[0.8000, 0.8000, 0.8000, 0.8000, 0.8000],
[0.8000, 0.8000, 0.8000, 0.8000, 0.8000],
[0.8000, 0.8000, 0.8000, 0.8000, 0.8000],
[0.8000, 0.8000, 0.8000, 0.8000, 0.8000]])
4. Batch of Images (4D tensor):
Shape: torch.Size([2, 3, 5, 5]) - (Batch, Channels, Height, Width)
4 PyTorch: NumPy on Steroids
If you’re coming from TensorFlow, think of PyTorch tensors as NumPy arrays with superpowers. Let’s explore some key similarities and differences:
import numpy as np
import torch
# Create arrays/tensors
np_array = np.array([[1, 2, 3],
[4, 5, 6]])
torch_tensor = torch.tensor([[1, 2, 3],
[4, 5, 6]])
print("NumPy array:")
print(np_array)
print("\nPyTorch tensor:")
print(torch_tensor)
# Basic operations look very similar
print("\nNumPy multiplication:")
print(np_array * 2)
print("\nPyTorch multiplication:")
print(torch_tensor * 2)
# Key Differences
print("\nKey PyTorch Features:")
print("1. GPU Support:")
if torch.cuda.is_available():
gpu_tensor = torch_tensor.cuda()
print(" - Can move to GPU:", gpu_tensor.device)
else:
print(" - GPU not available, but would be: tensor.cuda()")
print("\n2. Automatic Differentiation:")
x = torch.tensor([2.0], requires_grad=True)
y = x ** 2
y.backward()
print(f" - d(x^2)/dx at x=2: {x.grad}")
print("\n3. Direct Neural Network Integration:")
layer = torch.nn.Linear(3, 1) # Input size matches our tensor width (3)
print(" - Can directly feed to neural nets:")
output = layer(torch_tensor[0].float())
print(f" Input shape: {torch_tensor[0].shape}")
print(f" Output shape: {output.shape}")
print(f" Output value: {output}")
# Converting between NumPy and PyTorch
print("\nConversion:")
print("NumPy -> PyTorch:", torch.from_numpy(np_array))
print("PyTorch -> NumPy:", torch_tensor.numpy())
# Common operations for CV
img_tensor = torch.randn(3, 64, 64) # Random image tensor (CHW format)
print("\nTypical CV Operations:")
print(f"Original shape: {img_tensor.shape}")
print(f"Batch of 4 images: {img_tensor.unsqueeze(0).repeat(4, 1, 1, 1).shape}")
print(f"Normalize to [0,1]: {torch.nn.functional.normalize(img_tensor).shape}")NumPy array:
[[1 2 3]
[4 5 6]]
PyTorch tensor:
tensor([[1, 2, 3],
[4, 5, 6]])
NumPy multiplication:
[[ 2 4 6]
[ 8 10 12]]
PyTorch multiplication:
tensor([[ 2, 4, 6],
[ 8, 10, 12]])
Key PyTorch Features:
1. GPU Support:
- GPU not available, but would be: tensor.cuda()
2. Automatic Differentiation:
- d(x^2)/dx at x=2: tensor([4.])
3. Direct Neural Network Integration:
- Can directly feed to neural nets:
Input shape: torch.Size([3])
Output shape: torch.Size([1])
Output value: tensor([-1.6290], grad_fn=<ViewBackward0>)
Conversion:
NumPy -> PyTorch: tensor([[1, 2, 3],
[4, 5, 6]])
PyTorch -> NumPy: [[1 2 3]
[4 5 6]]
Typical CV Operations:
Original shape: torch.Size([3, 64, 64])
Batch of 4 images: torch.Size([4, 3, 64, 64])
Normalize to [0,1]: torch.Size([3, 64, 64])
Key advantages of PyTorch over NumPy:
- GPU Acceleration: Move computations to GPU with
.cuda()or.to('cuda') - Automatic Differentiation: Track gradients with
requires_grad=True - Deep Learning Integration: Direct compatibility with
torch.nnmodules - Dynamic Computation Graphs: Define-by-run approach, more Pythonic
- Built-in CV Operations: Rich ecosystem of vision-related functions
5 OpenCV Image Processing
OpenCV is a powerful library for image processing. Let’s explore some common operations:
import cv2
import numpy as np
from matplotlib import pyplot as plt
def load_sample_image():
"""Create a sample image if no image is available"""
# Create a simple image with some shapes
img = np.zeros((300, 400, 3), dtype=np.uint8)
# Add some shapes
cv2.rectangle(img, (50, 50), (150, 150), (0, 255, 0), -1) # Green rectangle
cv2.circle(img, (250, 150), 50, (0, 0, 255), -1) # Red circle
cv2.line(img, (300, 200), (350, 250), (255, 0, 0), 5) # Blue line
return img
def show_images(images, titles, figsize=(15, 5)):
"""Display multiple images side by side"""
fig, axes = plt.subplots(1, len(images), figsize=figsize)
if len(images) == 1:
axes = [axes]
for ax, img, title in zip(axes, images, titles):
if len(img.shape) == 2: # Grayscale
ax.imshow(img, cmap='gray')
else: # Color (BGR to RGB for matplotlib)
ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
ax.set_title(title)
ax.axis('off')
plt.tight_layout()
plt.show()
# Load or create sample image
img = load_sample_image()
show_images([img], ['Original Image'])
# 1. Basic Operations
def demonstrate_basic_operations(image):
# Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Blur the image
blurred = cv2.GaussianBlur(image, (7, 7), 0)
# Resize the image
resized = cv2.resize(image, (image.shape[1]//2, image.shape[0]//2))
resized = cv2.resize(resized, (image.shape[1], image.shape[0])) # Back to original size
show_images([gray, blurred, resized],
['Grayscale', 'Gaussian Blur', 'Resized'])
return gray
# Show basic operations
gray = demonstrate_basic_operations(img.copy())
# 2. Edge Detection and Contours
def demonstrate_edge_detection(image, gray):
# Canny edge detection
edges = cv2.Canny(gray, 50, 150)
# Find contours
contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Draw contours
contour_img = image.copy()
cv2.drawContours(contour_img, contours, -1, (0, 255, 255), 2)
show_images([edges, contour_img],
['Canny Edges', 'Contours'])
# Show edge detection and contours
demonstrate_edge_detection(img.copy(), gray)6 Dataset Setup and Preprocessing
6.1 Downloading the Flowers Dataset
def download_flowers_dataset(root='flower_data'):
"""Download and extract the flowers dataset"""
if not os.path.exists(root):
os.makedirs(root)
url = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz"
filename = os.path.join(root, "flower_photos.tgz")
# Download if not exists
if not os.path.exists(filename):
print("Downloading dataset...")
response = requests.get(url, stream=True)
with open(filename, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
# Extract if needed
if not os.path.exists(os.path.join(root, "flower_photos")):
print("Extracting dataset...")
with tarfile.open(filename) as tar:
tar.extractall(root)
return os.path.join(root, "flower_photos")
# Download dataset
dataset_path = download_flowers_dataset()6.2 Utility Functions for Visualization
def show_batch(images, labels, classes):
"""Display a batch of images with their labels"""
plt.figure(figsize=(12, 6))
for i in range(min(8, len(images))):
plt.subplot(2, 4, i + 1)
img = images[i].permute(1, 2, 0) # CHW -> HWC
plt.imshow(img)
plt.title(classes[labels[i]])
plt.axis('off')
plt.tight_layout()
plt.show()
def plot_training_history(history):
"""Plot training and validation loss/accuracy"""
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history['train_loss'], label='Train Loss')
plt.plot(history['val_loss'], label='Val Loss')
plt.axvline(x=history['best_epoch'], color='g', linestyle='--', label='Best Model')
plt.title('Loss over epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(history['train_acc'], label='Train Acc')
plt.plot(history['val_acc'], label='Val Acc')
plt.axvline(x=history['best_epoch'], color='g', linestyle='--', label='Best Model')
plt.title('Accuracy over epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.tight_layout()
plt.show()7 Building a Simple Linear Model
class FlowerClassifier(nn.Module):
def __init__(self, img_size=64, hidden_size=128, num_classes=5):
super().__init__()
self.flatten = nn.Flatten()
# Adjust input size based on image dimensions
input_size = img_size * img_size * 3
# Smaller network with stronger regularization
self.fc1 = nn.Linear(input_size, hidden_size)
self.bn1 = nn.BatchNorm1d(hidden_size)
self.relu1 = nn.ReLU()
self.dropout1 = nn.Dropout(0.5) # Increased dropout
self.fc2 = nn.Linear(hidden_size, hidden_size // 2)
self.bn2 = nn.BatchNorm1d(hidden_size // 2)
self.relu2 = nn.ReLU()
self.dropout2 = nn.Dropout(0.5) # Increased dropout
self.fc3 = nn.Linear(hidden_size // 2, num_classes)
self.softmax = nn.Softmax(dim=1)
def forward(self, x):
x = self.flatten(x)
x = self.fc1(x)
x = self.bn1(x)
x = self.relu1(x)
x = self.dropout1(x)
x = self.fc2(x)
x = self.bn2(x)
x = self.relu2(x)
x = self.dropout2(x)
x = self.fc3(x)
return x # Return logits for training
def predict(self, x):
with torch.no_grad():
return self.softmax(self.forward(x)) # Return probabilities for inference8 Training Components
8.1 Loss Function and Metrics
8.2 Training Loop with Early Stopping
from tqdm.notebook import tqdm
def train_model(model, train_loader, val_loader, criterion, optimizer,
num_epochs=30, patience=10, device='cpu'): # Balanced for demo performance
"""
Train the model with early stopping and progress bar, saving best model state
"""
history = {
'train_loss': [], 'train_acc': [],
'val_loss': [], 'val_acc': [],
'best_epoch': 0
}
best_val_loss = float('inf')
best_model_state = None
patience_counter = 0
# Create progress bar for epochs
pbar = tqdm(range(num_epochs), desc="Training")
for epoch in pbar:
# Training phase
model.train()
train_loss = 0
train_acc = 0
batch_count = 0
for inputs, labels in train_loader:
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
train_loss += loss.item()
train_acc += calculate_accuracy(outputs, labels)
batch_count += 1
# Update progress bar
pbar.set_postfix({
'train_loss': f'{train_loss/batch_count:.4f}',
'train_acc': f'{train_acc/batch_count:.4f}'
})
# Calculate training metrics
train_loss /= len(train_loader)
train_acc /= len(train_loader)
# Quick validation phase
model.eval()
val_loss = 0
val_acc = 0
with torch.no_grad():
for inputs, labels in val_loader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
loss = criterion(outputs, labels)
val_loss += loss.item()
val_acc += calculate_accuracy(outputs, labels)
# Calculate validation metrics
val_loss /= len(val_loader)
val_acc /= len(val_loader)
# Update history
history['train_loss'].append(train_loss)
history['train_acc'].append(train_acc)
history['val_loss'].append(val_loss)
history['val_acc'].append(val_acc)
# Update progress bar with validation metrics
pbar.set_postfix({
'train_loss': f'{train_loss:.4f}',
'train_acc': f'{train_acc:.4f}',
'val_loss': f'{val_loss:.4f}',
'val_acc': f'{val_acc:.4f}'
})
# Save best model
if val_loss < best_val_loss:
best_val_loss = val_loss
best_model_state = model.state_dict().copy()
history['best_epoch'] = epoch
patience_counter = 0
else:
patience_counter += 1
if patience_counter >= patience:
print(f'\nEarly stopping triggered at epoch {epoch}. Best was epoch {history["best_epoch"]}')
break
# Restore best model state
if best_model_state is not None:
model.load_state_dict(best_model_state)
print(f'\nRestored best model from epoch {history["best_epoch"]}')
return history9 Model Training and Evaluation
def prepare_data(max_images_per_class=100, img_size=64):
"""Prepare a small subset of the flowers dataset with train/val/test splits"""
transform = transforms.Compose([
transforms.Resize((img_size, img_size)), # Smaller images
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
# Load full dataset
dataset = datasets.ImageFolder(dataset_path, transform=transform)
# Create class-balanced subset
class_to_idx = dataset.class_to_idx
idx_to_class = {v: k for k, v in class_to_idx.items()}
# Count samples per class
class_counts = {}
for _, label in dataset:
class_counts[label] = class_counts.get(label, 0) + 1
# Select subset of images per class
selected_indices = []
class_counters = {label: 0 for label in class_counts.keys()}
for idx, (_, label) in enumerate(dataset):
if class_counters[label] < max_images_per_class:
selected_indices.append(idx)
class_counters[label] += 1
if all(count >= max_images_per_class for count in class_counters.values()):
break
# Create subset dataset
subset_dataset = torch.utils.data.Subset(dataset, selected_indices)
# Split into train/val/test (60%/20%/20%)
total_size = len(subset_dataset)
train_size = int(0.6 * total_size)
val_size = int(0.2 * total_size)
test_size = total_size - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(
subset_dataset,
[train_size, val_size, test_size],
generator=torch.Generator().manual_seed(42) # For reproducibility
)
# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)
print(f"Dataset size: {total_size} images")
print(f"Training set: {len(train_dataset)} images")
print(f"Validation set: {len(val_dataset)} images")
print(f"Test set: {len(test_dataset)} images")
return train_loader, val_loader, test_loader, dataset.classes
# Prepare data and model
train_loader, val_loader, test_loader, classes = prepare_data()
model = FlowerClassifier()
optimizer = optim.Adam(model.parameters(), lr=0.0005, weight_decay=0.01) # Added L2 regularization
# Train model (using validation set for early stopping)
history = train_model(model, train_loader, val_loader, criterion, optimizer)
# Plot training history
plot_training_history(history)
print("\nEvaluating final model on test set (never seen during training):")
model.eval()
test_loss = 0
test_acc = 0
with torch.no_grad():
for inputs, labels in test_loader:
outputs = model(inputs)
loss = criterion(outputs, labels)
test_loss += loss.item()
test_acc += calculate_accuracy(outputs, labels)
test_loss /= len(test_loader)
test_acc /= len(test_loader)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")Dataset size: 500 images
Training set: 300 images
Validation set: 100 images
Test set: 100 images
Restored best model from epoch 22
Evaluating final model on test set (never seen during training):
Test Loss: 1.5907
Test Accuracy: 0.3359
10 Making Predictions
def evaluate_and_visualize(model, data_loader, classes, dataset_name="Test", num_samples=8):
"""Evaluate model predictions and visualize results on a given dataset"""
model.eval()
all_preds = []
all_labels = []
images_to_show = []
with torch.no_grad():
for inputs, labels in val_loader:
outputs = model.predict(inputs)
_, predicted = torch.max(outputs, 1)
all_preds.extend(predicted.numpy())
all_labels.extend(labels.numpy())
if len(images_to_show) < num_samples:
images_to_show.extend([(img, pred, label)
for img, pred, label in zip(inputs, predicted, labels)])
# Plot sample predictions
plt.figure(figsize=(15, 6))
for i, (img, pred, label) in enumerate(images_to_show[:num_samples]):
plt.subplot(2, 4, i + 1)
img = img.permute(1, 2, 0)
plt.imshow(img)
color = 'green' if pred == label else 'red'
plt.title(f'Pred: {classes[pred]}\nTrue: {classes[label]}',
color=color)
plt.axis('off')
plt.tight_layout()
plt.show()
# Plot confusion matrix
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=classes, yticklabels=classes)
plt.title(f'Confusion Matrix ({dataset_name} Set)')
plt.xlabel('Predicted Class')
plt.ylabel('True Class')
plt.show()
# Print summary statistics
accuracy = np.sum(np.array(all_preds) == np.array(all_labels)) / len(all_labels)
print(f"\n{dataset_name} Set Summary:")
print(f"Overall Accuracy: {accuracy:.4f}")
print(f"Total Samples: {len(all_labels)}")
# Evaluate and visualize results on test set
evaluate_and_visualize(model, test_loader, classes, "Test")Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Got range [-1.8267832..1.7522904].
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Got range [-2.117904..2.3760502].
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Got range [-2.117904..1.8379141].
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Got range [-2.1007793..2.1519828].
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Got range [-2.117904..2.2565577].
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Got range [-2.0357141..2.2739873].
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Got range [-2.0182073..2.465708].
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Got range [-1.9980307..2.0648367].
Test Set Summary:
Overall Accuracy: 0.4500
Total Samples: 100
11 Next Steps
- Experiment with different model architectures
- Try different optimizers (SGD, RMSprop)
- Implement data augmentation
- Use transfer learning with pre-trained models